require(ClusterR)
## Loading required package: ClusterR
## Loading required package: gtools
require(scales)
## Loading required package: scales
require(CytoML)
## Loading required package: CytoML
library(reshape2)


# https://github.com/RGLab/flowWorkspace/issues/231




popsOfInterest = c("effector memory", "central memory", "naive", "effector")


gateKmeansWsp = function(gs,
                         fcsFile,
                         outputDir,
                         min = -20,
                         max = 250,
                         k = 4,
                         num_init = 150,
                         max_iters = 5000,
                         nodesOfInterest = c("Helper Tcells-CD4+", "cytotoxic Tcells-CD8+"),
                         markersToCluster = c("CCR7", "CD45RA", "CD28"),
                         addComp = FALSE) {
  dir.create(outputDir)
  outputRoot = paste0(outputDir, fcsFile)
  clustFile = paste0(outputRoot, ".clusterData")
  
   combo = data.frame(ht = getIndiceMat(gs, nodesOfInterest[1])[, 1],
                       ct = getIndiceMat(gs, nodesOfInterest[1])[, 1])
    combo$MDEF = combo$ct | combo$ht
    
    fullDef=data.frame(MDEF=combo$MDEF)
    allNodes=getNodes(gs)
    for(node in allNodes){
      tmp = getIndiceMat(gs, node)[, 1]
      fullDef=cbind(fullDef,tmp)
      
    }
    colnames(fullDef)=allNodes
    subto=5000
    fullDef=fullDef[1:min(subto,length(fullDef[,1])),]
    gh <- gs[[1]]
    subdata = getData(gh)[combo$MDEF, ]
    
    channels = colnames(subdata)[grep("Comp", colnames(subdata))]
    # markersToCluster= markernames(subdata)[grep("Comp", colnames(subdata))]
  if (!file.exists(clustFile)) {
   
    # getMar
    # for (marker in markersToCluster) {
    #   if (addComp) {
    #     channels = c(channels, c(paste0(
    #       "Comp-",
    #       getChannelMarker(frame, marker)$name
    #     )))
    #   } else{
    #     channels = c(channels, c(paste0(
    #       getChannelMarker(frame, marker)$name
    #     )))
    #   }
    # }
    # print(getData(gh))
    
    t = as.data.frame(subdata@exprs)[, channels]
    for (channel in channels) {
      t[, channel] = squish(t[, channel], range = c(min, max))
    }
    clust = center_scale(t[, channels])
    clust =clust[1:min(subto,length(clust[,1])),]

    # colnames(clust) = markersToCluster
    print(paste0("clustering sample ", fcsFile))
    km_rc = KMeans_rcpp(
      clust,
      clusters = k,
      num_init = num_init,
      max_iters = max_iters,
      tol = 1e-06,
      initializer = 'optimal_init',
      # threads = 4,
      verbose = F,
      seed = 42
    )
    
    clust = as.data.frame(clust)
    colnames(clust)=channels

    clust$KMEANS_CLUSTER = km_rc$clusters
        print(paste0("running phenograph for sample ", fcsFile))

    clusterPhenograph = cytof_cluster(xdata = clust, method = "Rphenograph")
    clust$PHENOGRAPH = clusterPhenograph
    
            print(paste0("running tsne for sample ", fcsFile))

            
    tsne <- cytof_dimReduction(data = clust,
                               method = "tsne",
                               out_dim = 2)
    
 cluster_DensVM <- cytof_cluster(xdata = clust,
                                 ydata = tsne, method = "DensVM")
    clust = cbind(clust,tsne)
    
    clust$CLUSTER_DENSVM=cluster_DensVM
    save(clust, file = clustFile)
    
  }else{
   load(clustFile) 
  }
    
    clust=cbind(clust,fullDef)
    optKN = 16
    o3_t = Optimal_Clusters_KMeans(
      clust[,channels],
      max_clusters = optKN,
      plot_clusters = TRUE,
      criterion = 'distortion_fK',
      tol = 1e-06,
      seed = 42
    )
    
    tsalpha = .5
    tsSize =1
    methods = c("KMEANS_CLUSTER", "PHENOGRAPH","CLUSTER_DENSVM")
    for (method in methods) {
      clust$CURRENT_COLOR=clust[,method]
      pb2 = ggplot(clust) +
        geom_point(
          alpha = tsalpha,
          aes(
            x = tsne_1,
            y = tsne_2,
            colour = CURRENT_COLOR
          ),
          size = tsSize
        ) +
        xlab(paste0("tsne1")) +
        ylab(paste0("tsne2"))  + ggtitle(paste0(method, "\n", samp)) +
        theme(legend.position = "bottom")
      print(pb2)
      
      # + t2
  
  }
}



require(flowCore)
## Loading required package: flowCore
require(cytofkit)
## Loading required package: cytofkit
## Loading required package: ggplot2
## Loading required package: plyr
library(ggcyto)
## Loading required package: ncdfFlow
## Loading required package: RcppArmadillo
## Loading required package: BH
## Loading required package: flowWorkspace
library(ClusterR)
library(gridExtra)
library(grid)
library(scales)
library(cluster)
library(reshape)
## 
## Attaching package: 'reshape'
## The following objects are masked from 'package:plyr':
## 
##     rename, round_any
## The following objects are masked from 'package:reshape2':
## 
##     colsplit, melt, recast
library(openCyto)

alpha = 0.05
reCluster = TRUE
runPhenograph = FALSE
inputDir = "/Volumes/Beta2/flow/testNovels/"

wsDir = "/Volumes/Beta2/flow/wsp/"
wsps <-
  list.files(wsDir,
             pattern = "wsp$",
             full = TRUE,
             recursive = TRUE)

print(paste0("Found ", length(wsps), " wsps"))
## [1] "Found 33 wsps"
#
# for (file in wsps) {
#   ws <- openWorkspace(file)
#   s = getSamples(ws)
#   print(length(s$name))
#   s=s[grepl("PANEL 1",s$name),]
#   print(length(s$name))
#
#   for (samp in s$name) {
#     if(runif(1)[1]<.15){
#       command = paste0(
#         "rsync -av \'msi:/scratch.global/lanej/flow/full/fcs/",
#         gsub(" ", "\\\ ", samp, fixed = TRUE),
#         "' ",
#         inputDir
#       )
#       system(command)
#     }
#   }
# }
markersToCluster = c("CCR7", "CD45RA", "CD28")

outputDir = "/Volumes/Beta2/flow/novelResults/"

wsMapFileFile = "/Volumes/Beta2/flow/novelResults/fcsLOCALMAP.manual.txt"
mapper = read.delim(wsMapFileFile,
                    stringsAsFactors = FALSE,
                    sep = "\t")
targets =
  list.files(inputDir,
             pattern = ".fcs$",
             full = FALSE)

for (samp in mapper$FCS) {
  if (file.exists(paste(inputDir, samp, sep = ""))) {
    wsFile = mapper[which(mapper$FCS == samp),]$WSP
    print(wsFile)
    ws <- openWorkspace(wsFile)
    
    print(samp)
    #read the fcs file
    frame = read.FCS(paste(inputDir, samp, sep = ""))
    
    id = samp
    #load the workspace for this file, apply transforms, compensation, and gates
    gs <-
      parseWorkspace(
        ws,
        #WSP file
        path = inputDir,
        #FCS file
        name = 1,
        #sample group
        subset = id[1],
        #load single fcs file
        isNcdf = FALSE,
        #not memory mapped
        compensation = compensation(keyword(frame)$`SPILL`)
      )
    
    
    min = -20
    max = 250
    k = 4
    num_init = 10
    max_iters = 5000
    # nodesOfInterest = c("Helper Tcells-CD4+", "cytotoxic Tcells-CD8+")
    nodesOfInterest = c("Helper Tcells-CD4+")
    
    markersToCluster = c("CCR7", "CD45RA", "CD28")
    addComp = TRUE
    # for (node in nodesOfInterest) {
    print(samp)
    fcsFile = samp
    outputDir = "/Volumes/Beta2/flow/novelResults/"
    gateKmeansWsp(
      gs = gs,
      fcsFile = samp,
      outputDir = "/Volumes/Beta2/flow/novelResults/",
      addComp = TRUE ,
      num_init = 10,
      max_iters = 5000
    )
    
  }
}
## [1] "/Volumes/Beta2/flow/wsp//2016-10-31_PANEL 1_DHS_Group two_F1637276_038.fcs_panel1Rename.wsp"
## [1] "2016-10-31_PANEL 1_DHS_Group two_F1637276_038.fcs"
## Parsing 1 samples
## windows version of flowJo workspace recognized.
## version X
## Creating flowSet...
## loading data: /Volumes/Beta2/flow/testNovels//2016-10-31_PANEL 1_DHS_Group two_F1637276_038.fcs
## Compensating
## gating ...
## done!
## [1] "2016-10-31_PANEL 1_DHS_Group two_F1637276_038.fcs"
## Warning in dir.create(outputDir): '/Volumes/Beta2/flow/novelResults'
## already exists
## [1] "clustering sample 2016-10-31_PANEL 1_DHS_Group two_F1637276_038.fcs"
## [1] "running phenograph for sample 2016-10-31_PANEL 1_DHS_Group two_F1637276_038.fcs"
##   Running PhenoGraph...
## Run Rphenograph starts:
##   -Input data of 5000 rows and 13 columns
##   -k is set to 30
##   Finding nearest neighbors...DONE ~ 0.585 s
##   Compute jaccard coefficient between nearest-neighbor sets...DONE ~ 1.764 s
##   Build undirected graph from the weighted links...DONE ~ 0.361 s
##   Run louvain clustering on the graph ...DONE ~ 0.386 s
## Run Rphenograph DONE, took a total of 3.09599999999999s.
##   Return a community class
##   -Modularity value: 0.7404781 
##   -Number of clusters: 8 DONE!
## [1] "running tsne for sample 2016-10-31_PANEL 1_DHS_Group two_F1637276_038.fcs"
##   Running t-SNE...with seed 42  DONE
##   Running DensVM...Testing kernel bandwidth for  20 points in the range min= 0.6962965  to max= 6.962965 
## This will take a while...
## Computing number of peaks for kernel bandwidth =  0.6962965 
## Computing number of peaks for kernel bandwidth =  1.026121 
## Computing number of peaks for kernel bandwidth =  1.355946 
## Computing number of peaks for kernel bandwidth =  1.685771 
## Computing number of peaks for kernel bandwidth =  2.015595 
## Computing number of peaks for kernel bandwidth =  2.34542 
## Computing number of peaks for kernel bandwidth =  2.675245 
## Computing number of peaks for kernel bandwidth =  3.005069 
## Computing number of peaks for kernel bandwidth =  3.334894 
## Computing number of peaks for kernel bandwidth =  3.664718 
## Computing number of peaks for kernel bandwidth =  3.994543 
## Computing number of peaks for kernel bandwidth =  4.324368 
## Computing number of peaks for kernel bandwidth =  4.654192 
## Computing number of peaks for kernel bandwidth =  4.984017 
## Computing number of peaks for kernel bandwidth =  5.313842 
## Computing number of peaks for kernel bandwidth =  5.643666 
## Computing number of peaks for kernel bandwidth =  5.973491 
## Computing number of peaks for kernel bandwidth =  6.303316 
## Computing number of peaks for kernel bandwidth =  6.63314 
## Computing number of peaks for kernel bandwidth =  6.962965 
##  DONE!

## [1] "/Volumes/Beta2/flow/wsp//2017-02-01_PANEL 1_ZF_group one_F1652202_024.fcs_panel1Rename.wsp"
## [1] "2017-02-01_PANEL 1_ZF_group one_F1652202_024.fcs"
## Parsing 1 samples
## windows version of flowJo workspace recognized.
## version X
## Creating flowSet...
## loading data: /Volumes/Beta2/flow/testNovels//2017-02-01_PANEL 1_ZF_group one_F1652202_024.fcs
## Compensating
## gating ...
## done!
## [1] "2017-02-01_PANEL 1_ZF_group one_F1652202_024.fcs"
## Warning in dir.create(outputDir): '/Volumes/Beta2/flow/novelResults'
## already exists
## [1] "clustering sample 2017-02-01_PANEL 1_ZF_group one_F1652202_024.fcs"
## [1] "running phenograph for sample 2017-02-01_PANEL 1_ZF_group one_F1652202_024.fcs"
##   Running PhenoGraph...
## Run Rphenograph starts:
##   -Input data of 5000 rows and 13 columns
##   -k is set to 30
##   Finding nearest neighbors...DONE ~ 0.437 s
##   Compute jaccard coefficient between nearest-neighbor sets...DONE ~ 1.84 s
##   Build undirected graph from the weighted links...DONE ~ 0.362 s
##   Run louvain clustering on the graph ...DONE ~ 0.321 s
## Run Rphenograph DONE, took a total of 2.96000000000015s.

##   Return a community class
##   -Modularity value: 0.7555513 
##   -Number of clusters: 8 DONE!
## [1] "running tsne for sample 2017-02-01_PANEL 1_ZF_group one_F1652202_024.fcs"
##   Running t-SNE...with seed 42  DONE
##   Running DensVM...Testing kernel bandwidth for  20 points in the range min= 0.7959007  to max= 7.959007 
## This will take a while...
## Computing number of peaks for kernel bandwidth =  0.7959007 
## Computing number of peaks for kernel bandwidth =  1.172906 
## Computing number of peaks for kernel bandwidth =  1.549912 
## Computing number of peaks for kernel bandwidth =  1.926917 
## Computing number of peaks for kernel bandwidth =  2.303923 
## Computing number of peaks for kernel bandwidth =  2.680929 
## Computing number of peaks for kernel bandwidth =  3.057934 
## Computing number of peaks for kernel bandwidth =  3.43494 
## Computing number of peaks for kernel bandwidth =  3.811945 
## Computing number of peaks for kernel bandwidth =  4.188951 
## Computing number of peaks for kernel bandwidth =  4.565957 
## Computing number of peaks for kernel bandwidth =  4.942962 
## Computing number of peaks for kernel bandwidth =  5.319968 
## Computing number of peaks for kernel bandwidth =  5.696973 
## Computing number of peaks for kernel bandwidth =  6.073979 
## Computing number of peaks for kernel bandwidth =  6.450985 
## Computing number of peaks for kernel bandwidth =  6.82799 
## Computing number of peaks for kernel bandwidth =  7.204996 
## Computing number of peaks for kernel bandwidth =  7.582001 
## Computing number of peaks for kernel bandwidth =  7.959007 
##  DONE!

## [1] "/Volumes/Beta2/flow/wsp//2017-02-24_PANEL 1_RR_group one_F1635691_020.fcs_panel1Rename.wsp"
## [1] "2017-02-24_PANEL 1_RR_group one_F1635691_020.fcs"
## Parsing 1 samples
## windows version of flowJo workspace recognized.
## version X
## Creating flowSet...
## loading data: /Volumes/Beta2/flow/testNovels//2017-02-24_PANEL 1_RR_group one_F1635691_020.fcs
## Compensating
## gating ...
## done!
## [1] "2017-02-24_PANEL 1_RR_group one_F1635691_020.fcs"
## Warning in dir.create(outputDir): '/Volumes/Beta2/flow/novelResults'
## already exists
## [1] "clustering sample 2017-02-24_PANEL 1_RR_group one_F1635691_020.fcs"
## [1] "running phenograph for sample 2017-02-24_PANEL 1_RR_group one_F1635691_020.fcs"
##   Running PhenoGraph...
## Run Rphenograph starts:
##   -Input data of 5000 rows and 13 columns
##   -k is set to 30
##   Finding nearest neighbors...DONE ~ 0.547 s
##   Compute jaccard coefficient between nearest-neighbor sets...DONE ~ 1.638 s
##   Build undirected graph from the weighted links...DONE ~ 0.362 s
##   Run louvain clustering on the graph ...DONE ~ 0.338 s
## Run Rphenograph DONE, took a total of 2.88499999999999s.

##   Return a community class
##   -Modularity value: 0.7106201 
##   -Number of clusters: 12 DONE!
## [1] "running tsne for sample 2017-02-24_PANEL 1_RR_group one_F1635691_020.fcs"
##   Running t-SNE...with seed 42  DONE
##   Running DensVM...Testing kernel bandwidth for  20 points in the range min= 0.849329  to max= 8.49329 
## This will take a while...
## Computing number of peaks for kernel bandwidth =  0.849329 
## Computing number of peaks for kernel bandwidth =  1.251643 
## Computing number of peaks for kernel bandwidth =  1.653956 
## Computing number of peaks for kernel bandwidth =  2.05627 
## Computing number of peaks for kernel bandwidth =  2.458584 
## Computing number of peaks for kernel bandwidth =  2.860898 
## Computing number of peaks for kernel bandwidth =  3.263211 
## Computing number of peaks for kernel bandwidth =  3.665525 
## Computing number of peaks for kernel bandwidth =  4.067839 
## Computing number of peaks for kernel bandwidth =  4.470153 
## Computing number of peaks for kernel bandwidth =  4.872466 
## Computing number of peaks for kernel bandwidth =  5.27478 
## Computing number of peaks for kernel bandwidth =  5.677094 
## Computing number of peaks for kernel bandwidth =  6.079408 
## Computing number of peaks for kernel bandwidth =  6.481721 
## Computing number of peaks for kernel bandwidth =  6.884035 
## Computing number of peaks for kernel bandwidth =  7.286349 
## Computing number of peaks for kernel bandwidth =  7.688663 
## Computing number of peaks for kernel bandwidth =  8.090976 
## Computing number of peaks for kernel bandwidth =  8.49329 
##  DONE!

## [1] "/Volumes/Beta2/flow/wsp//2017-02-24_PANEL 1_RR_group one_F1652401_015.fcs_panel1Rename.wsp"
## [1] "2017-02-24_PANEL 1_RR_group one_F1652401_015.fcs"
## Parsing 1 samples
## windows version of flowJo workspace recognized.
## version X
## Creating flowSet...
## loading data: /Volumes/Beta2/flow/testNovels//2017-02-24_PANEL 1_RR_group one_F1652401_015.fcs
## Compensating
## gating ...
## done!
## [1] "2017-02-24_PANEL 1_RR_group one_F1652401_015.fcs"
## Warning in dir.create(outputDir): '/Volumes/Beta2/flow/novelResults'
## already exists
## [1] "clustering sample 2017-02-24_PANEL 1_RR_group one_F1652401_015.fcs"
## [1] "running phenograph for sample 2017-02-24_PANEL 1_RR_group one_F1652401_015.fcs"
##   Running PhenoGraph...
## Run Rphenograph starts:
##   -Input data of 5000 rows and 13 columns
##   -k is set to 30
##   Finding nearest neighbors...DONE ~ 0.439 s
##   Compute jaccard coefficient between nearest-neighbor sets...DONE ~ 1.741 s
##   Build undirected graph from the weighted links...DONE ~ 0.358 s
##   Run louvain clustering on the graph ...DONE ~ 0.354 s
## Run Rphenograph DONE, took a total of 2.89199999999983s.

##   Return a community class
##   -Modularity value: 0.7553768 
##   -Number of clusters: 10 DONE!
## [1] "running tsne for sample 2017-02-24_PANEL 1_RR_group one_F1652401_015.fcs"
##   Running t-SNE...with seed 42  DONE
##   Running DensVM...Testing kernel bandwidth for  20 points in the range min= 0.772674  to max= 7.72674 
## This will take a while...
## Computing number of peaks for kernel bandwidth =  0.772674 
## Computing number of peaks for kernel bandwidth =  1.138677 
## Computing number of peaks for kernel bandwidth =  1.504681 
## Computing number of peaks for kernel bandwidth =  1.870684 
## Computing number of peaks for kernel bandwidth =  2.236688 
## Computing number of peaks for kernel bandwidth =  2.602691 
## Computing number of peaks for kernel bandwidth =  2.968695 
## Computing number of peaks for kernel bandwidth =  3.334698 
## Computing number of peaks for kernel bandwidth =  3.700702 
## Computing number of peaks for kernel bandwidth =  4.066705 
## Computing number of peaks for kernel bandwidth =  4.432709 
## Computing number of peaks for kernel bandwidth =  4.798712 
## Computing number of peaks for kernel bandwidth =  5.164716 
## Computing number of peaks for kernel bandwidth =  5.530719 
## Computing number of peaks for kernel bandwidth =  5.896723 
## Computing number of peaks for kernel bandwidth =  6.262726 
## Computing number of peaks for kernel bandwidth =  6.62873 
## Computing number of peaks for kernel bandwidth =  6.994733 
## Computing number of peaks for kernel bandwidth =  7.360736 
## Computing number of peaks for kernel bandwidth =  7.72674 
##  DONE!